Project Overview 🧬

This project showcases a full bulk RNA-seq analysis pipeline as a portfolio piece for a CV. It includes QC, normalization, differential expression, interactive visualization, and interactive tables, demonstrating skills in R, visualization, and reproducible reporting.

Data 📊

For this showcase, we are using the Airway dataset from Bioconductor: human airway smooth muscle cells, treated with dexamethasone vs untreated.

data(airway)
airway$dex <- relevel(airway$dex, "untrt")

# Build DESeq2 design
dds <- DESeqDataSet(airway, ~ dex)

# QC and filter
keep <- rowSums(counts(dds) >= 10) >= 4
dds <- dds[keep,]

# Run DE
dds <- DESeq(dds)
res <- results(dds)

# Prepare data frame
res_df <- as.data.frame(res) %>%
  dplyr::mutate(
    gene = rownames(.),
    sig = case_when(
      padj < 0.05 & log2FoldChange > 1  ~ "Up",
      padj < 0.05 & log2FoldChange < -1 ~ "Down",
      TRUE ~ "NS"
    ),
    padj = ifelse(is.na(padj), 1, padj)
  )

Interactive Tables 📝

DE Results Table

DT::datatable(res_df, filter = 'top', options = list(pageLength = 10, scrollX = TRUE))

Summary Table

sum_table <- res_df %>%
  group_by(sig) %>%
  summarise(count = n())

DT::datatable(sum_table, options = list(dom = 't'))

PCA 📈

vsd <- vst(dds)
pca_data <- plotPCA(vsd, intgroup="dex", returnData=TRUE)

pca_plot <- ggplot(pca_data, aes(PC1, PC2, color=dex, label=name)) +
  geom_point(size=5, alpha=0.6) +
  geom_text(vjust=-1.5, size=3) +
  scale_color_manual(values=c("#1f77b4", "#ff7f0e")) +
  ggtitle("PCA of Bulk RNA-seq Samples") +
  theme_minimal(base_size=14)

pca_plot

Interactive Volcano Plot 🌋

volc <- ggplot(res_df, aes(x = log2FoldChange, y = -log10(padj), color = sig, text = gene)) +
  geom_point(alpha = 0.7, size = 3) +
  scale_color_manual(values = c("Up" = "#e41a1c", "Down" = "#377eb8", "NS" = "#4daf4a")) +
  geom_vline(xintercept = c(-1,1), linetype="dashed", color="black") +
  geom_hline(yintercept = -log10(0.05), linetype="dashed", color="black") +
  theme_minimal(base_size=14) +
  ggtitle("Bulk RNA-seq Volcano Plot") +
  xlab("Log2 Fold Change") +
  ylab("-Log10 Adjusted P-value")

# Convert to interactive plotly
volc_plotly <- ggplotly(volc, tooltip = c("text","x","y"))
volc_plotly